[Proposed] Split

Author

김보람

Published

April 2, 2024

imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
import warnings
warnings.filterwarnings('ignore')
%run ../function_proposed_gcn.py
with open('../fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    
df_results = try_5(fraudTrain, 10,11406996,0.8)
df_results = try_5(fraudTrain, 10,11406996,0.9, prev_results=df_results)
df_results = try_5(fraudTrain, 10,11406996,0.7, prev_results=df_results)
df_results = try_5(fraudTrain, 9,11406996,0.9, prev_results=df_results)
df_results = try_5(fraudTrain, 9,11406996,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 9,11406996,0.7, prev_results=df_results)
df_results = try_5(fraudTrain, 8,11406996,0.9, prev_results=df_results)
df_results = try_5(fraudTrain, 8,11406996,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 8,11406996,0.7, prev_results=df_results)
df_results = try_5(fraudTrain, 7,11406996,0.9, prev_results=df_results)
df_results = try_5(fraudTrain, 7,11406996,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 7,11406996,0.7, prev_results=df_results)

ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.917783 0.016590 0.947368 0.032609 0.968007 True Proposed 0.131127 9009 amt 0.505051 25980 0.001463 None 11406996 0.8
1 GCN None 0.938217 0.022547 0.973684 0.044074 0.982845 True Proposed 0.129820 9009 amt 0.499944 25978 0.001463 None 11406996 0.9
2 GCN None 0.886999 0.012450 0.973684 0.024585 0.960453 True Proposed 0.129633 9009 amt 0.499278 25982 0.001463 None 11406996 0.7
3 GCN None 0.941596 0.024899 0.934783 0.048505 0.984682 True Proposed 0.120230 9009 amt 0.500611 28885 0.001593 None 11406996 0.9
4 GCN None 0.915980 0.018661 0.867925 0.036537 0.947152 True Proposed 0.120450 9009 amt 0.500611 28874 0.001836 None 11406996 0.8
5 GCN None 0.892921 0.011832 0.948718 0.023373 0.958848 True Proposed 0.120417 9009 amt 0.502054 28876 0.001351 None 11406996 0.7
6 GCN None 0.940046 0.022579 0.978261 0.044139 0.984014 True Proposed 0.109714 9009 amt 0.500500 32508 0.001415 None 11406996 0.9
7 GCN None 0.921472 0.012408 0.864865 0.024465 0.963446 True Proposed 0.109427 9009 amt 0.500056 32498 0.001139 None 11406996 0.8
8 GCN None 0.895498 0.011357 0.975000 0.022453 0.960901 True Proposed 0.108611 9009 amt 0.495948 32497 0.001231 None 11406996 0.7
9 GCN None 0.947098 0.032544 0.970588 0.062977 0.985970 True Proposed 0.098821 9009 amt 0.498501 37125 0.001832 None 11406996 0.9
10 GCN None 0.911643 0.012940 1.000000 0.025550 0.980952 True Proposed 0.099066 9009 amt 0.502498 37122 0.001158 None 11406996 0.8
11 GCN None 0.894713 0.013640 0.931034 0.026886 0.969827 True Proposed 0.098448 9009 amt 0.497724 37127 0.001562 None 11406996 0.7
df_results = try_5(fraudTrain, 10,1e+7,0.8)
df_results = try_5(fraudTrain, 9,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 8,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 7,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 6,1e+7,0.8, prev_results=df_results)
df_results = try_5(fraudTrain, 5,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.919618 0.016957 0.972973 0.033333 0.978765 True Proposed 0.129770 9009 amt 0.499833 25976 0.001424 None 10000000.0 0.8
1 GCN None 0.922709 0.014153 0.914286 0.027875 0.946846 True Proposed 0.118827 9009 amt 0.495837 28878 0.001212 None 10000000.0 0.8
2 GCN None 0.913900 0.017907 0.980769 0.035172 0.965836 True Proposed 0.109695 9009 amt 0.499611 32497 0.001600 None 10000000.0 0.8
3 GCN None 0.918746 0.013098 0.952381 0.025840 0.972711 True Proposed 0.098272 9009 amt 0.498501 37118 0.001132 None 10000000.0 0.8
4 GCN None 0.926632 0.020352 0.985075 0.039879 0.980303 True Proposed 0.086689 9009 amt 0.496059 43316 0.001547 None 10000000.0 0.8
5 GCN None 0.926365 0.017738 0.932432 0.034813 0.974309 True Proposed 0.074957 9009 amt 0.499056 51959 0.001424 None 10000000.0 0.8
df_results = try_7(fraudTrain, 0.9, 10,1e+7,0.8)
df_results = try_7(fraudTrain, 0.8, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.7, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.6, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.4, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.3, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.2, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.1, 10,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.818217 0.006495 1.000000 0.012906 0.964933 True Proposed 0.145931 5004 amt 0.900480 26086 0.001188 None 10000000.0 0.8
1 GCN None 0.881583 0.011848 0.973684 0.023410 0.965544 True Proposed 0.143569 5630 amt 0.801599 26069 0.001458 None 10000000.0 0.8
2 GCN None 0.896080 0.014925 0.976190 0.029401 0.974247 True Proposed 0.139946 6435 amt 0.699922 26049 0.001612 None 10000000.0 0.8
3 GCN None 0.909559 0.010531 0.862069 0.020807 0.969194 True Proposed 0.135187 7507 amt 0.599840 26017 0.001115 None 10000000.0 0.8
4 GCN None 0.933647 0.022740 0.975610 0.044444 0.981265 True Proposed 0.122906 11261 amt 0.402185 25922 0.001582 None 10000000.0 0.8
5 GCN None 0.660591 0.005782 1.000000 0.011497 0.981600 True Proposed 0.111030 15015 amt 0.298701 25839 0.001974 None 10000000.0 0.8
6 GCN None 0.956188 0.032787 0.926829 0.063333 0.984209 True Proposed 0.094651 22522 amt 0.200648 25655 0.001598 None 10000000.0 0.8
7 GCN None 0.978864 0.049180 0.771429 0.092466 0.986971 True Proposed 0.065230 45045 amt 0.100766 25076 0.001396 None 10000000.0 0.8
df_results = try_7(fraudTrain, 0.09, 10,1e+7,0.8)
df_results = try_7(fraudTrain, 0.08, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.07, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.06, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.04, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.03, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.02, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.01, 10,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results

이거 커널 죽음

df_results = try_7(fraudTrain, 0.009, 10,1e+7,0.8)
df_results = try_7(fraudTrain, 0.008, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.007, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.006, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.004, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.003, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.002, 10,1e+7,0.8, prev_results=df_results)
df_results = try_7(fraudTrain, 0.001, 10,1e+7,0.8, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'../results/{ymdhms}-proposed.csv',index=False)

df_results